This R Markdown document generates the reproducibility report for “ADH and ALDH gene regulation” study.
First, we run CoDeS3D pipeline (gene mode, GTEx eqtl project) to get regulatory interactions for each of ADH and ALDH gene across all GTEx (v8) tissues: python codes3d/codes3d.py -g ALDH1A1 -o results/codes3d/ALDH1A1 --eqtl-project GTEx.
# 6 ADH genes; there were no significant interactions for ADH7 gene
ADH1A <- read.table("results/codes3d/ADH1A/significant_eqtls.txt", header = TRUE, sep = "\t")
ADH1B <- read.table("results/codes3d/ADH1B/significant_eqtls.txt", header = TRUE, sep = "\t")
ADH1C <- read.table("results/codes3d/ADH1C/significant_eqtls.txt", header = TRUE, sep = "\t")
ADH4 <- read.table("results/codes3d/ADH4/significant_eqtls.txt", header = TRUE, sep = "\t")
ADH5 <- read.table("results/codes3d/ADH5/significant_eqtls.txt", header = TRUE, sep = "\t")
ADH6 <- read.table("results/codes3d/ADH6/significant_eqtls.txt", header = TRUE, sep = "\t")
# 17 ALDH genes; there were no ignificant interactions for ALDH8A1 and ALDH9A1 genes
ALDH16A1 <- read.table("results/codes3d/ALDH16A1/significant_eqtls.txt", header = TRUE, sep = "\t")
ALDH18A1 <- read.table("results/codes3d/ALDH18A1/significant_eqtls.txt", header = TRUE, sep = "\t")
ALDH1A1 <- read.table("results/codes3d/ALDH1A1/significant_eqtls.txt", header = TRUE, sep = "\t")
ALDH1A2 <- read.table("results/codes3d/ALDH1A2/significant_eqtls.txt", header = TRUE, sep = "\t")
ALDH1A3 <- read.table("results/codes3d/ALDH1A3/significant_eqtls.txt", header = TRUE, sep = "\t")
ALDH1B1 <- read.table("results/codes3d/ALDH1B1/significant_eqtls.txt", header = TRUE, sep = "\t")
ALDH1L1 <- read.table("results/codes3d/ALDH1L1/significant_eqtls.txt", header = TRUE, sep = "\t")
ALDH1L2 <- read.table("results/codes3d/ALDH1L2/significant_eqtls.txt", header = TRUE, sep = "\t")
ALDH2 <- read.table("results/codes3d/ALDH2/significant_eqtls.txt", header = TRUE, sep = "\t")
ALDH3A1 <- read.table("results/codes3d/ALDH3A1/significant_eqtls.txt", header = TRUE, sep = "\t")
ALDH3A2 <- read.table("results/codes3d/ALDH3A2/significant_eqtls.txt", header = TRUE, sep = "\t")
ALDH3B1 <- read.table("results/codes3d/ALDH3B1/significant_eqtls.txt", header = TRUE, sep = "\t")
ALDH3B2 <- read.table("results/codes3d/ALDH3B2/significant_eqtls.txt", header = TRUE, sep = "\t")
ALDH4A1 <- read.table("results/codes3d/ALDH4A1/significant_eqtls.txt", header = TRUE, sep = "\t")
ALDH5A1 <- read.table("results/codes3d/ALDH5A1/significant_eqtls.txt", header = TRUE, sep = "\t")
ALDH6A1 <- read.table("results/codes3d/ALDH6A1/significant_eqtls.txt", header = TRUE, sep = "\t")
ALDH7A1 <- read.table("results/codes3d/ALDH7A1/significant_eqtls.txt", header = TRUE, sep = "\t")
# remove interactions with missing rsID number for snp
# 6 ADH genes
ADH1A <- ADH1A[ADH1A$snp != ".", ]; ADH1B <- ADH1B[ADH1B$snp != ".", ]
ADH1C <- ADH1C[ADH1C$snp != ".", ]; ADH4 <- ADH4[ADH4$snp != ".", ]
ADH5 <- ADH5[ADH5$snp != ".", ]; ADH6 <- ADH6[ADH6$snp != ".", ]
# 17 ALDH genes
ALDH16A1 <- ALDH16A1[ALDH16A1$snp != ".", ]; ALDH18A1 <- ALDH18A1[ALDH18A1$snp != ".", ]
ALDH1A1 <- ALDH1A1[ALDH1A1$snp != ".", ]; ALDH1A2 <- ALDH1A2[ALDH1A2$snp != ".", ]
ALDH1A3 <- ALDH1A3[ALDH1A3$snp != ".", ]; ALDH1B1 <- ALDH1B1[ALDH1B1$snp != ".", ]
ALDH1L1 <- ALDH1L1[ALDH1L1$snp != ".", ]; ALDH1L2 <- ALDH1L2[ALDH1L2$snp != ".", ]
ALDH2 <- ALDH2[ALDH2$snp != ".", ]; ALDH3A1 <- ALDH3A1[ALDH3A1$snp != ".", ]
ALDH3A2 <- ALDH3A2[ALDH3A2$snp != ".", ]; ALDH3B1 <- ALDH3B1[ALDH3B1$snp != ".", ]
ALDH3B2 <- ALDH3B2[ALDH3B2$snp != ".", ]; ALDH4A1 <- ALDH4A1[ALDH4A1$snp != ".", ]
ALDH5A1 <- ALDH5A1[ALDH5A1$snp != ".", ]; ALDH6A1 <- ALDH6A1[ALDH6A1$snp != ".", ]
ALDH7A1 <- ALDH7A1[ALDH7A1$snp != ".", ]
Extracting eQTL SNPs impacting ADH and ALDH gene expression.
# 6 ADH genes
ADH1A_esnps <- unique(ADH1A$snp); ADH1B_esnps <- unique(ADH1B$snp)
ADH1C_esnps <- unique(ADH1C$snp); ADH4_esnps <- unique(ADH4$snp)
ADH5_esnps <- unique(ADH5$snp); ADH6_esnps <- unique(ADH6$snp)
# 17 ALDH genes
ALDH16A1_esnps <- unique(ALDH16A1$snp); ALDH18A1_esnps <- unique(ALDH18A1$snp)
ALDH1A1_esnps <- unique(ALDH1A1$snp); ALDH1A2_esnps <- unique(ALDH1A2$snp)
ALDH1A3_esnps <- unique(ALDH1A3$snp); ALDH1B1_esnps <- unique(ALDH1B1$snp)
ALDH1L1_esnps <- unique(ALDH1L1$snp); ALDH1L2_esnps <- unique(ALDH1L2$snp)
ALDH2_esnps <- unique(ALDH2$snp); ALDH3A1_esnps <- unique(ALDH3A1$snp)
ALDH3A2_esnps <- unique(ALDH3A2$snp); ALDH3B1_esnps <- unique(ALDH3B1$snp)
ALDH3B2_esnps <- unique(ALDH3B2$snp); ALDH4A1_esnps <- unique(ALDH4A1$snp)
ALDH5A1_esnps <- unique(ALDH5A1$snp); ALDH6A1_esnps <- unique(ALDH6A1$snp)
ALDH7A1_esnps <- unique(ALDH7A1$snp)
Extracting affected tissues.
# 6 ADH genes
ADH1A_tissues <- unique(ADH1A$tissue); ADH1B_tissues <- unique(ADH1B$tissue)
ADH1C_tissues <- unique(ADH1C$tissue); ADH4_tissues <- unique(ADH4$tissue)
ADH5_tissues <- unique(ADH5$tissue); ADH6_tissues <- unique(ADH6$tissue)
# 17 ALDH genes
ALDH16A1_tissues <- unique(ALDH16A1$tissue); ALDH18A1_tissues <- unique(ALDH18A1$tissue)
ALDH1A1_tissues <- unique(ALDH1A1$tissue); ALDH1A2_tissues <- unique(ALDH1A2$tissue)
ALDH1A3_tissues <- unique(ALDH1A3$tissue); ALDH1B1_tissues <- unique(ALDH1B1$tissue)
ALDH1L1_tissues <- unique(ALDH1L1$tissue); ALDH1L2_tissues <- unique(ALDH1L2$tissue)
ALDH2_tissues <- unique(ALDH2$tissue); ALDH3A1_tissues <- unique(ALDH3A1$tissue)
ALDH3A2_tissues <- unique(ALDH3A2$tissue); ALDH3B1_tissues <- unique(ALDH3B1$tissue)
ALDH3B2_tissues <- unique(ALDH3B2$tissue); ALDH4A1_tissues <- unique(ALDH4A1$tissue)
ALDH5A1_tissues <- unique(ALDH5A1$tissue); ALDH6A1_tissues <- unique(ALDH6A1$tissue)
ALDH7A1_tissues <- unique(ALDH7A1$tissue)
# getting gene expression for all genes across all gtex tissues
gtex <- gzfile("data/GTEx_Analysis_2017-06-05_v8_RNASeQCv1.1.9_gene_median_tpm.gct.gz",'rt')
gtex_expr <- read.table(gtex, skip = 2, sep = "\t", header = TRUE)
adh_aldh_genes <- c("ADH1A", "ADH1B", "ADH1C", "ADH4", "ADH5", "ADH6", "ADH7", "ALDH16A1", "ALDH18A1",
"ALDH1A1", "ALDH1A2", "ALDH1A3", "ALDH1B1", "ALDH1L1", "ALDH1L2", "ALDH2",
"ALDH3A1", "ALDH3A2", "ALDH3B1", "ALDH3B2", "ALDH4A1", "ALDH5A1", "ALDH6A1",
"ALDH7A1", "ALDH8A1", "ALDH9A1")
adh_aldh_expr <- gtex_expr[gtex_expr$Description %in% adh_aldh_genes, ]
# fix column names
colnames(adh_aldh_expr) <- sub("[.]$", "", colnames(adh_aldh_expr)) # remove dot in the end
colnames(adh_aldh_expr) <- gsub("\\.{1,}", "_", colnames(adh_aldh_expr)) # replace any dot with underscore
# Heatmap of log10(TPM + 1) gene expression across 49 GTEx tissues
#adh_aldh_expr_melt <- melt(adh_aldh_expr, id="Description", measure.vars = colnames(adh_aldh_expr[3:56])); colnames(adh_aldh_expr_melt) <- c("name", "tissue", "tpm")
#adh_aldh_expr_melt$log10tpm <- log10(adh_aldh_expr_melt$tpm + 1) # transform to log10(TPM + 1)
adh_aldh_expr[,3:56] <- log10(adh_aldh_expr[,3:56]+1)
adh_aldh_expr_sub <- adh_aldh_expr[2:56]
adh_aldh_expr_sub <- remove_rownames(adh_aldh_expr_sub) %>% column_to_rownames(var="Description")
adh_aldh_mat <- as.matrix(adh_aldh_expr_sub) # convert to matrix
colors = colorRamp2(c(0, max(adh_aldh_mat)), c("white","black"), space = "RGB")
#pdf("figures/adh_aldh_gene_expression_heatmap.pdf", width = 16, height = 16)
ht_ge <- Heatmap(t(adh_aldh_mat), name = "log10(TPM+1)",
col = colors, na_col = "white",
cluster_rows = FALSE, cluster_columns = FALSE, row_names_side = "left",
column_names_side = "top", column_names_gp = gpar(fontface = "italic"),
column_order = adh_aldh_genes,
width = unit(15, "cm"), height = unit(22, "cm"),
border = TRUE, rect_gp = gpar(col = "grey"),
heatmap_legend_param = list(legend_direction = "horizontal",
legend_width = unit(5, "cm")))
draw(ht_ge, heatmap_legend_side = "bottom")
#dev.off()
# This function checks up- and downregulation of ADH and ALDH genes in GTEx tissues. It takes codes3d input as a dataframe and returns if the gene is not expressed, up-, down-, or up/downregulated
all_t <- rownames(t(adh_aldh_mat))
up_down <- function(x){
ts <- unique(x$tissue) # vector of tissues
new <- c()
for (t in 1:length(all_t)){
if (all_t[t] %in% ts){
afs <- x[x$tissue==ts[t],]$log2_aFC # vector of effect sizes
if (all(na.omit(afs) > 0)){
new <- c(new, "up")
} else if (all(na.omit(afs) < 0)) {
new <- c(new, "down")
} else if (all(na.omit(afs) == 0)) {
new <- c(new, NA)
} else {
new <- c(new, "both")
}
} else{
new <- c(new, NA)
}
}
new
}
# create a dataframe and populate it with up- and downregulation values
df <- data.frame(ADH1A = up_down(ADH1A), ADH1B = up_down(ADH1B), ADH1C = up_down(ADH1C),
ADH4 = up_down(ADH4), ADH5 = up_down(ADH5), ADH6 = up_down(ADH6),
ALDH16A1 = up_down(ALDH16A1), ALDH18A1 = up_down(ALDH18A1),
ALDH1A1 = up_down(ALDH1A1), ALDH1A2 = up_down(ALDH1A2),
ALDH1A3 = up_down(ALDH1A3), ALDH1B1 = up_down(ALDH1B1),
ALDH1L1 = up_down(ALDH1L1), ALDH1L2 = up_down(ALDH1L2), ALDH2 = up_down(ALDH2),
ALDH3A1 = up_down(ALDH3A1), ALDH3A2 = up_down(ALDH3A2),
ALDH3B1 = up_down(ALDH3B1), ALDH3B2 = up_down(ALDH3B2),
ALDH4A1 = up_down(ALDH4A1), ALDH5A1 = up_down(ALDH5A1),
ALDH6A1 = up_down(ALDH6A1), ALDH7A1 = up_down(ALDH7A1),
row.names = rownames(t(adh_aldh_mat)))
discrete_mat <- as.matrix(df)
colors = structure(c("#ED1C24", "#1C75BC", "black"), names = c("up", "down", "both"))
adh_aldh_egenes <- c("ADH1A", "ADH1B", "ADH1C", "ADH4", "ADH5", "ADH6", "ALDH16A1", "ALDH18A1",
"ALDH1A1", "ALDH1A2", "ALDH1A3", "ALDH1B1", "ALDH1L1", "ALDH1L2", "ALDH2",
"ALDH3A1", "ALDH3A2", "ALDH3B1", "ALDH3B2", "ALDH4A1", "ALDH5A1", "ALDH6A1",
"ALDH7A1")
#pdf("figures/adh_aldh_gene_regulation_heatmap.pdf", width = 16, height = 16)
ht_gr <- Heatmap(discrete_mat, name = "regulation",
col = colors, na_col = "white",
cluster_rows = FALSE, cluster_columns = FALSE, row_names_side = "left",
column_names_side = "top", column_names_gp = gpar(fontface = "italic"),
column_order = adh_aldh_egenes,
width = unit(14, "cm"), height = unit(22, "cm"),
border = TRUE, rect_gp = gpar(col = "grey"),
heatmap_legend_param = list(legend_direction = "horizontal",
legend_width = unit(5, "cm")))
draw(ht_gr, heatmap_legend_side = "bottom")
#dev.off()
To determine if ADH and ALDH gene eQTLs are associated with GWAS traits we used LDtraitR package. Register here to access LDtrait API with R. You’ll get your own token. In the function below specify your own token. This analysis was done on 05/02/2021. GWAS Catalog last updated on 05/02/2021, 07:05 PM (GMT+1300).
# This function queriies LDtrait. It takes a vector of eQTLs, splits the vector into list of vectors of length 50 (or less) and sends a request for each of these vectors. It outputs the dataframe with the query results for all eQTLs. Uncomment the lines to query LDtrait.
query_LDtrait <- function(esnps){
df <- data.frame()
max <- 50
ind <- seq_along(esnps)
esnps_splits <- split(esnps, ceiling(ind/max))
cat("Total number of SNP chunks to analyse: ", length(esnps_splits), "\n")
for(i in 1:length(esnps_splits)){
#skip_to_next <- FALSE
cat("Analysing SNP chunk: ", i, "\n")
tryCatch({
snps <- unlist(esnps_splits[i], use.names=FALSE)
t <- LDtrait(snps = snps, pop = "CEU", r2d = "r2", r2d_threshold = "0.8",
win_size = "500000", token = "your_token_here")
df <- rbind(df, t)
}, error=function(e){
cat("ERROR: ", conditionMessage(e), "\n")
#skip_to_next <<- TRUE
})
#if(skip_to_next){ next }
}
return(df)
}
## 6 ADH genes
#ADH1A_df <- query_LDtrait(ADH1A_esnps) # all associations
#ADH1A_df$P_value <- as.numeric(ADH1A_df$P_value)
#write.table(ADH1A_df, file = "results/LD/ADH1A_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
ADH1A_df <- read.table(file = "results/LD/ADH1A_LDtraits.txt", sep = "\t", header = TRUE)
ADH1A_df_gw <- subset(ADH1A_df, P_value < 0.00000005) # only genome-wide significant associations
#write.table(ADH1A_df_gw, file = "results/LD/ADH1A_LDtraits_5E-08.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
#ADH1B_df <- query_LDtrait(ADH1B_esnps) # all associations
#ADH1B_df$P_value <- as.numeric(ADH1B_df$P_value)
#write.table(ADH1B_df, file = "results/LD/ADH1B_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
ADH1B_df <- read.table(file = "results/LD/ADH1B_LDtraits.txt", sep = "\t", header = TRUE)
ADH1B_df_gw <- subset(ADH1B_df, P_value < 0.00000005) # only genome-wide significant associations
#write.table(ADH1B_df_gw, file = "results/LD/ADH1B_LDtraits_5E-08.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
#ADH1C_df <- query_LDtrait(ADH1C_esnps) # all associations
#ADH1C_df$P_value <- as.numeric(ADH1C_df$P_value)
#write.table(ADH1C_df, file = "results/LD/ADH1C_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
ADH1C_df <- read.table(file = "results/LD/ADH1C_LDtraits.txt", sep = "\t", header = TRUE)
ADH1C_df_gw <- subset(ADH1C_df, P_value < 0.00000005) # only genome-wide significant associations
#write.table(ADH1C_df_gw, file = "results/LD/ADH1C_LDtraits_5E-08.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
#ADH4_df <- query_LDtrait(ADH4_esnps) # all associations
#ADH4_df$P_value <- as.numeric(ADH4_df$P_value)
#write.table(ADH4_df, file = "results/LD/ADH4_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
ADH4_df <- read.table(file = "results/LD/ADH4_LDtraits.txt", sep = "\t", header = TRUE)
ADH4_df_gw <- subset(ADH4_df, P_value < 0.00000005) # only genome-wide significant associations
#write.table(ADH4_df_gw, file = "results/LD/ADH4_LDtraits_5E-08.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
#ADH5_df <- query_LDtrait(ADH5_esnps) # all associations
#ADH5_df$P_value <- as.numeric(ADH5_df$P_value)
#write.table(ADH5_df, file = "results/LD/ADH5_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
ADH5_df <- read.table(file = "results/LD/ADH5_LDtraits.txt", sep = "\t", header = TRUE)
ADH5_df_gw <- subset(ADH5_df, P_value < 0.00000005) # only genome-wide significant associations
#write.table(ADH5_df_gw, file = "results/LD/ADH5_LDtraits_5E-08.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
#ADH6_df <- query_LDtrait(ADH6_esnps) # all associations
#ADH6_df$P_value <- as.numeric(ADH6_df$P_value)
#write.table(ADH6_df, file = "results/LD/ADH6_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
ADH6_df <- read.table(file = "results/LD/ADH6_LDtraits.txt", sep = "\t", header = TRUE)
ADH6_df_gw <- subset(ADH6_df, P_value < 0.00000005) # only genome-wide significant associations
#write.table(ADH6_df_gw, file = "results/LD/ADH6_LDtraits_5E-08.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
## 17 ALDH genes
#ALDH16A1_df <- query_LDtrait(ALDH16A1_esnps) # all associations
#ALDH16A1_df$P_value <- as.numeric(ALDH16A1_df$P_value)
#write.table(ALDH16A1_df, file = "results/LD/ALDH16A1_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
ALDH16A1_df <- read.table(file = "results/LD/ALDH16A1_LDtraits.txt", sep = "\t", header = TRUE)
ALDH16A1_df_gw <- subset(ALDH16A1_df, P_value < 0.00000005) # only genome-wide significant associations
#write.table(ALDH16A1_df_gw, file = "results/LD/ALDH16A1_LDtraits_5E-08.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
#ALDH18A1_df <- query_LDtrait(ALDH18A1_esnps) # all associations
#ALDH18A1_df$P_value <- as.numeric(ALDH18A1_df$P_value)
#write.table(ALDH18A1_df, file = "results/LD/ALDH18A1_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
ALDH18A1_df <- read.table(file = "results/LD/ALDH18A1_LDtraits.txt", sep = "\t", header = TRUE)
ALDH18A1_df_gw <- subset(ALDH18A1_df, P_value < 0.00000005) # only genome-wide significant associations
#write.table(ALDH18A1_df_gw, file = "results/LD/ALDH18A1_LDtraits_5E-08.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
#ALDH1A1_df <- query_LDtrait(ALDH1A1_esnps) # all associations
#ALDH1A1_df$P_value <- as.numeric(ALDH1A1_df$P_value)
#write.table(ALDH1A1_df, file = "results/LD/ALDH1A1_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
ALDH1A1_df <- read.table(file = "results/LD/ALDH1A1_LDtraits.txt", sep = "\t", header = TRUE)
ALDH1A1_df_gw <- subset(ALDH1A1_df, P_value < 0.00000005) # only genome-wide significant associations
#write.table(ALDH1A1_df_gw, file = "results/LD/ALDH1A1_LDtraits_5E-08.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
#ALDH1A2_df <- query_LDtrait(ALDH1A2_esnps) # all associations
#ALDH1A2_df$P_value <- as.numeric(ALDH1A2_df$P_value)
#write.table(ALDH1A2_df, file = "results/LD/ALDH1A2_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
ALDH1A2_df <- read.table(file = "results/LD/ALDH1A2_LDtraits.txt", sep = "\t", header = TRUE)
ALDH1A2_df_gw <- subset(ALDH1A2_df, P_value < 0.00000005) # only genome-wide significant associations
#write.table(ALDH1A2_df_gw, file = "results/LD/ALDH1A2_LDtraits_5E-08.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
#ALDH1A3_df <- query_LDtrait(ALDH1A3_esnps) # all associations
#ALDH1A3_df$P_value <- as.numeric(ALDH1A3_df$P_value)
#write.table(ALDH1A3_df, file = "results/LD/ALDH1A3_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
ALDH1A3_df <- read.table(file = "results/LD/ALDH1A3_LDtraits.txt", sep = "\t", header = TRUE)
ALDH1A3_df_gw <- subset(ALDH1A3_df, P_value < 0.00000005) # only genome-wide significant associations
#write.table(ALDH1A3_df_gw, file = "results/LD/ALDH1A3_LDtraits_5E-08.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
#ALDH1B1_df <- query_LDtrait(ALDH1B1_esnps) # all associations
#ALDH1B1_df$P_value <- as.numeric(ALDH1B1_df$P_value)
#write.table(ALDH1B1_df, file = "results/LD/ALDH1B1_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
ALDH1B1_df <- read.table(file = "results/LD/ALDH1B1_LDtraits.txt", sep = "\t", header = TRUE)
ALDH1B1_df_gw <- subset(ALDH1B1_df, P_value < 0.00000005) # only genome-wide significant associations
#write.table(ALDH1B1_df_gw, file = "results/LD/ALDH1B1_LDtraits_5E-08.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
#ALDH1L1_df <- query_LDtrait(ALDH1L1_esnps) # all associations
#ALDH1L1_df$P_value <- as.numeric(ALDH1L1_df$P_value)
#write.table(ALDH1L1_df, file = "results/LD/ALDH1L1_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
ALDH1L1_df <- read.table(file = "results/LD/ALDH1L1_LDtraits.txt", sep = "\t", header = TRUE)
ALDH1L1_df_gw <- subset(ALDH1L1_df, P_value < 0.00000005) # only genome-wide significant associations
#write.table(ALDH1L1_df_gw, file = "results/LD/ALDH1L1_LDtraits_5E-08.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
#ALDH1L2_df <- query_LDtrait(ALDH1L2_esnps) # all associations
#ALDH1L2_df$P_value <- as.numeric(ALDH1L2_df$P_value)
#write.table(ALDH1L2_df, file = "results/LD/ALDH1L2_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
ALDH1L2_df <- read.table(file = "results/LD/ALDH1L2_LDtraits.txt", sep = "\t", header = TRUE)
ALDH1L2_df_gw <- subset(ALDH1L2_df, P_value < 0.00000005) # only genome-wide significant associations
#write.table(ALDH1L2_df_gw, file = "results/LD/ALDH1L2_LDtraits_5E-08.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
#ALDH2_df <- query_LDtrait(ALDH2_esnps) # all associations
#ALDH2_df$P_value <- as.numeric(ALDH2_df$P_value)
#write.table(ALDH2_df, file = "results/LD/ALDH2_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
ALDH2_df <- read.table(file = "results/LD/ALDH2_LDtraits.txt", sep = "\t", header = TRUE)
ALDH2_df_gw <- subset(ALDH2_df, P_value < 0.00000005) # only genome-wide significant associations
#write.table(ALDH2_df_gw, file = "results/LD/ALDH2_LDtraits_5E-08.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
#ALDH3A1_df <- query_LDtrait(ALDH3A1_esnps) # all associations
#ALDH3A1_df$P_value <- as.numeric(ALDH3A1_df$P_value)
#write.table(ALDH3A1_df, file = "results/LD/ALDH3A1_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
ALDH3A1_df <- read.table(file = "results/LD/ALDH3A1_LDtraits.txt", sep = "\t", header = TRUE)
ALDH3A1_df_gw <- subset(ALDH3A1_df, P_value < 0.00000005) # only genome-wide significant associations
#write.table(ALDH3A1_df_gw, file = "results/LD/ALDH3A1_LDtraits_5E-08.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
#ALDH3A2_df <- query_LDtrait(ALDH3A2_esnps) # all associations
#ALDH3A2_df$P_value <- as.numeric(ALDH3A2_df$P_value)
#write.table(ALDH3A2_df, file = "results/LD/ALDH3A2_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
ALDH3A2_df <- read.table(file = "results/LD/ALDH3A2_LDtraits.txt", sep = "\t", header = TRUE)
ALDH3A2_df_gw <- subset(ALDH3A2_df, P_value < 0.00000005) # only genome-wide significant associations
#write.table(ALDH3A2_df_gw, file = "results/LD/ALDH3A2_LDtraits_5E-08.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
#ALDH3B1_df <- query_LDtrait(ALDH3B1_esnps) # all associations
#ALDH3B1_df$P_value <- as.numeric(ALDH3B1_df$P_value)
#write.table(ALDH3B1_df, file = "results/LD/ALDH3B1_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
ALDH3B1_df <- read.table(file = "results/LD/ALDH3B1_LDtraits.txt", sep = "\t", header = TRUE)
ALDH3B1_df_gw <- subset(ALDH3B1_df, P_value < 0.00000005) # only genome-wide significant associations
#write.table(ALDH3B1_df_gw, file = "results/LD/ALDH3B1_LDtraits_5E-08.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
#ALDH3B2_df <- query_LDtrait(ALDH3B2_esnps) # all associations
#ALDH3B2_df$P_value <- as.numeric(ALDH3B2_df$P_value)
#write.table(ALDH3B2_df, file = "results/LD/ALDH3B2_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
ALDH3B2_df <- read.table(file = "results/LD/ALDH3B2_LDtraits.txt", sep = "\t", header = TRUE)
ALDH3B2_df_gw <- subset(ALDH3B2_df, P_value < 0.00000005) # only genome-wide significant associations
#write.table(ALDH3B2_df_gw, file = "results/LD/ALDH3B2_LDtraits_5E-08.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
#ALDH4A1_df <- query_LDtrait(ALDH4A1_esnps) # all associations
#ALDH4A1_df$P_value <- as.numeric(ALDH4A1_df$P_value)
#write.table(ALDH4A1_df, file = "results/LD/ALDH4A1_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
ALDH4A1_df <- read.table(file = "results/LD/ALDH4A1_LDtraits.txt", sep = "\t", header = TRUE)
ALDH4A1_df_gw <- subset(ALDH4A1_df, P_value < 0.00000005) # only genome-wide significant associations
#write.table(ALDH4A1_df_gw, file = "results/LD/ALDH4A1_LDtraits_5E-08.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
#ALDH5A1_df <- query_LDtrait(ALDH5A1_esnps) # all associations
#ALDH5A1_df$P_value <- as.numeric(ALDH5A1_df$P_value)
#write.table(ALDH5A1_df, file = "results/LD/ALDH5A1_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
ALDH5A1_df <- read.table(file = "results/LD/ALDH5A1_LDtraits.txt", sep = "\t", header = TRUE)
ALDH5A1_df_gw <- subset(ALDH5A1_df, P_value < 0.00000005) # only genome-wide significant associations
#write.table(ALDH5A1_df_gw, file = "results/LD/ALDH5A1_LDtraits_5E-08.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
#ALDH6A1_df <- query_LDtrait(ALDH6A1_esnps) # all associations
#ALDH6A1_df$P_value <- as.numeric(ALDH6A1_df$P_value)
#write.table(ALDH6A1_df, file = "results/LD/ALDH6A1_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
ALDH6A1_df <- read.table(file = "results/LD/ALDH6A1_LDtraits.txt", sep = "\t", header = TRUE)
ALDH6A1_df_gw <- subset(ALDH6A1_df, P_value < 0.00000005) # only genome-wide significant associations
#write.table(ALDH6A1_df_gw, file = "results/LD/ALDH6A1_LDtraits_5E-08.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
#ALDH7A1_df <- query_LDtrait(ALDH7A1_esnps) # all associations
#ALDH7A1_df$P_value <- as.numeric(ALDH7A1_df$P_value)
#write.table(ALDH7A1_df, file = "results/LD/ALDH7A1_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
ALDH7A1_df <- read.table(file = "results/LD/ALDH7A1_LDtraits.txt", sep = "\t", header = TRUE)
ALDH7A1_df_gw <- subset(ALDH7A1_df, P_value < 0.00000005) # only genome-wide significant associations
#write.table(ALDH7A1_df_gw, file = "results/LD/ALDH7A1_LDtraits_5E-08.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
Here, we plot percentage of all and genome-wide significant associations of ADH and ALDH eQTLs with GWAS traits.
# to get number of ADH1A eQTLs associated with traits: length(unique(ADH1A_df$Query))
# to get number of ADH1A eQTLs: length(unique(ADH1A_esnps))
# 6 ADH genes
# all associations
adh_order <- c('ADH1A', 'ADH1B', 'ADH1C', 'ADH4', 'ADH5', 'ADH6')
ADH_esnps <- c(length(unique(ADH1A_esnps)), length(unique(ADH1B_esnps)),
length(unique(ADH1C_esnps)), length(unique(ADH4_esnps)),
length(unique(ADH5_esnps)), length(unique(ADH6_esnps)))
ADH_esnps_assoc <- c(length(unique(ADH1A_df$Query)), length(unique(ADH1B_df$Query)),
length(unique(ADH1C_df$Query)), length(unique(ADH4_df$Query)),
length(unique(ADH5_df$Query)), length(unique(ADH6_df$Query)))
ADH.df <- data.frame(
gene = rep(adh_order, 2),
esnps = rep(c("with association","without association"), each=6),
number = c(ADH_esnps_assoc, ADH_esnps-ADH_esnps_assoc),
percentage = c(round(ADH_esnps_assoc/ADH_esnps*100, 2),
100-round(ADH_esnps_assoc/ADH_esnps*100, 2)))
#tiff("figures/percent_ADH_eQTLs.tif", units="in", width=10, height=8, res=300)
#pdf("figures/percent_ADH_eQTLs.pdf", width = 10, height = 8)
#jpeg("figures/percent_ADH_eQTLs.jpeg", units="in", width=10, height=8, res=300)
ggplot(ADH.df, aes(x = gene, y = percentage, fill = esnps)) +
geom_bar(stat="identity", position = "stack") +
theme_classic() +
theme(plot.title = element_blank(),
axis.title.x = element_blank(),
legend.text=element_text(size=20),
legend.title=element_blank(),
legend.position = "bottom",
legend.direction = "horizontal",
axis.text=element_text(size=20, colour = "black"),
axis.text.x=element_text(face = "italic", angle = 45, hjust=1),
axis.title=element_text(size=24, colour = "black"),
strip.text.x = element_text(size = 19, colour = "black")) +
geom_text(aes(y = percentage, label = paste0(number)),
position=position_stack(vjust = 0.5), vjust=-0.25, color = "black",
size = 8, fontface = 'italic') +
labs(y = "Percentage")
#dev.off()
# genome-wide significant associations
ADH_esnps_assoc_gw <- c(length(unique(ADH1A_df_gw$Query)), length(unique(ADH1B_df_gw$Query)),
length(unique(ADH1C_df_gw$Query)), length(unique(ADH4_df_gw$Query)),
length(unique(ADH5_df_gw$Query)), length(unique(ADH6_df_gw$Query)))
ADH_gw.df <- data.frame(
gene = adh_order,
number = ADH_esnps_assoc_gw,
percentage = c(round(ADH_esnps_assoc_gw/ADH_esnps_assoc*100, 2)))
#tiff("figures/percent_ADH_eQTLs_gw.tif", units="in", width=10, height=8, res=300)
#pdf("figures/percent_ADH_eQTLs_gw.pdf", width = 10, height = 8)
#jpeg("figures/percent_ADH_eQTLs_gw.jpeg", units="in", width=10, height=8, res=300)
ggplot(ADH_gw.df, aes(x = gene, y = percentage)) +
geom_bar(stat="identity", position = "identity") +
theme_classic() +
theme(plot.title = element_blank(),
axis.title.x = element_blank(),
legend.text=element_text(size=20),
legend.title=element_blank(),
legend.position = "bottom",
legend.direction = "horizontal",
axis.text=element_text(size=20, colour = "black"),
axis.text.x=element_text(face = "italic", angle = 45, hjust=1),
axis.title=element_text(size=24, colour = "black"),
strip.text.x = element_text(size = 19, colour = "black")) +
geom_text(aes(y = percentage, label = paste0(number)),
position=position_dodge(width=0.9), vjust=-0.25, color = "black",
size = 8, fontface = 'italic') +
labs(y = "Percentage")
#dev.off()
# 17 ALDH genes
# all associations
aldh_order <- c('ALDH16A1', 'ALDH18A1', 'ALDH1A1', 'ALDH1A2', 'ALDH1A3', 'ALDH1B1', 'ALDH1L1',
'ALDH1L2', 'ALDH2', 'ALDH3A1', 'ALDH3A2', 'ALDH3B1', 'ALDH3B2', 'ALDH4A1', 'ALDH5A1',
'ALDH6A1', 'ALDH7A1')
ALDH_esnps <- c(length(unique(ALDH16A1_esnps)), length(unique(ALDH18A1_esnps)),
length(unique(ALDH1A1_esnps)), length(unique(ALDH1A2_esnps)),
length(unique(ALDH1A3_esnps)), length(unique(ALDH1B1_esnps)),
length(unique(ALDH1L1_esnps)), length(unique(ALDH1L2_esnps)),
length(unique(ALDH2_esnps)), length(unique(ALDH3A1_esnps)),
length(unique(ALDH3A2_esnps)), length(unique(ALDH3B1_esnps)),
length(unique(ALDH3B2_esnps)), length(unique(ALDH4A1_esnps)),
length(unique(ALDH5A1_esnps)), length(unique(ALDH6A1_esnps)),
length(unique(ALDH7A1_esnps)))
ALDH_esnps_assoc <- c(length(unique(ALDH16A1_df$Query)), length(unique(ALDH18A1_df$Query)),
length(unique(ALDH1A1_df$Query)), 0, 0, length(unique(ALDH1B1_df$Query)),
length(unique(ALDH1L1_df$Query)), length(unique(ALDH1L2_df$Query)),
length(unique(ALDH2_df$Query)), length(unique(ALDH3A1_df$Query)),
length(unique(ALDH3A2_df$Query)), length(unique(ALDH3B1_df$Query)),
length(unique(ALDH3B2_df$Query)), 0, length(unique(ALDH5A1_df$Query)),
length(unique(ALDH6A1_df$Query)), length(unique(ALDH7A1_df$Query)))
ALDH.df <- data.frame(
gene = rep(aldh_order, 2),
esnps = rep(c("with association","without association"), each=17),
number = c(ALDH_esnps_assoc, ALDH_esnps-ALDH_esnps_assoc),
percentage = c(round(ALDH_esnps_assoc/ALDH_esnps*100, 2),
100-round(ALDH_esnps_assoc/ALDH_esnps*100, 2)))
#tiff("figures/percent_ALDH_eQTLs.tif", units="in", width=20, height=8, res=300)
#pdf("figures/percent_ALDH_eQTLs.pdf", width = 20, height = 8)
#jpeg("figures/percent_ALDH_eQTLs.jpeg", units="in", width=20, height=8, res=300)
ggplot(ALDH.df, aes(x = gene, y = percentage, fill = esnps)) +
geom_bar(stat="identity", position = "stack") +
theme_classic() +
theme(plot.title = element_blank(),
axis.title.x = element_blank(),
legend.text=element_text(size=20),
legend.title=element_blank(),
legend.position = "bottom",
legend.direction = "horizontal",
axis.text=element_text(size=20, colour = "black"),
axis.text.x=element_text(face = "italic", angle = 45, hjust=1),
axis.title=element_text(size=24, colour = "black"),
strip.text.x = element_text(size = 19, colour = "black")) +
geom_text(aes(y = percentage, label = paste0(number)),
position=position_stack(vjust = 0.4), vjust=-0.25, color = "black",
size = 8, fontface = 'italic') +
labs(y = "Percentage")
#dev.off()
# genome-wide significant associations
ALDH_esnps_assoc_gw <- c(length(unique(ALDH16A1_df_gw$Query)), length(unique(ALDH18A1_df_gw$Query)),
length(unique(ALDH1A1_df_gw$Query)), 0, 0, length(unique(ALDH1B1_df_gw$Query)),
length(unique(ALDH1L1_df_gw$Query)), length(unique(ALDH1L2_df_gw$Query)),
length(unique(ALDH2_df_gw$Query)), length(unique(ALDH3A1_df_gw$Query)),
length(unique(ALDH3A2_df_gw$Query)), length(unique(ALDH3B1_df_gw$Query)),
length(unique(ALDH3B2_df_gw$Query)), 0, length(unique(ALDH5A1_df_gw$Query)),
length(unique(ALDH6A1_df_gw$Query)), length(unique(ALDH7A1_df_gw$Query)))
ALDH_gw.df <- data.frame(
gene = aldh_order,
number = ALDH_esnps_assoc_gw,
percentage = c(round(ALDH_esnps_assoc_gw/ALDH_esnps_assoc*100, 2)))
#tiff("figures/percent_ALDH_eQTLs_gw.tif", units="in", width=20, height=8, res=300)
#pdf("figures/percent_ALDH_eQTLs_gw.pdf", width = 20, height = 8)
#jpeg("figures/percent_ADH_eQTLs_gw.jpeg", units="in", width=20, height=8, res=300)
ggplot(ALDH_gw.df, aes(x = gene, y = percentage)) +
geom_bar(stat="identity", position = "identity") +
theme_classic() +
theme(plot.title = element_blank(),
axis.title.x = element_blank(),
legend.text=element_text(size=20),
legend.title=element_blank(),
legend.position = "bottom",
legend.direction = "horizontal",
axis.text=element_text(size=20, colour = "black"),
axis.text.x=element_text(face = "italic", angle = 45, hjust=1),
axis.title=element_text(size=24, colour = "black"),
strip.text.x = element_text(size = 19, colour = "black")) +
geom_text(aes(y = percentage, label = paste0(number)),
position=position_dodge(width=0.9), vjust=-0.25, color = "black",
size = 8, fontface = 'italic') +
labs(y = "Percentage")
## Warning: Removed 3 rows containing missing values (geom_bar).
## Warning: Removed 3 rows containing missing values (geom_text).
#dev.off()
Get number of ADH and ALDH eQTL associations with GWAS trait.
# 6 ADH genes
ADH_traits_df <- data.frame()
ADH1A_melt <- melt(ADH1A_df_gw, id="Query", measure.vars = "GWAS_Trait")
ADH1A_melt <- dcast(ADH1A_melt, value ~ variable)
## Aggregation function missing: defaulting to length
ADH1A_melt$gene <- "ADH1A"
ADH_traits_df <- rbind(ADH_traits_df, ADH1A_melt)
ADH1B_melt <- melt(ADH1B_df_gw, id="Query", measure.vars = "GWAS_Trait")
ADH1B_melt <- dcast(ADH1B_melt, value ~ variable)
## Aggregation function missing: defaulting to length
ADH1B_melt$gene <- "ADH1B"
ADH_traits_df <- rbind(ADH_traits_df, ADH1B_melt)
ADH1C_melt <- melt(ADH1C_df_gw, id="Query", measure.vars = "GWAS_Trait")
ADH1C_melt <- dcast(ADH1C_melt, value ~ variable)
## Aggregation function missing: defaulting to length
ADH1C_melt$gene <- "ADH1C"
ADH_traits_df <- rbind(ADH_traits_df, ADH1C_melt)
ADH4_melt <- melt(ADH4_df_gw, id="Query", measure.vars = "GWAS_Trait")
ADH4_melt <- dcast(ADH4_melt, value ~ variable)
## Aggregation function missing: defaulting to length
ADH4_melt$gene <- "ADH4"
ADH_traits_df <- rbind(ADH_traits_df, ADH4_melt)
ADH5_melt <- melt(ADH5_df_gw, id="Query", measure.vars = "GWAS_Trait")
ADH5_melt <- dcast(ADH5_melt, value ~ variable)
## Aggregation function missing: defaulting to length
ADH5_melt$gene <- "ADH5"
ADH_traits_df <- rbind(ADH_traits_df, ADH5_melt)
ADH6_melt <- melt(ADH6_df_gw, id="Query", measure.vars = "GWAS_Trait")
ADH6_melt <- dcast(ADH6_melt, value ~ variable)
## Aggregation function missing: defaulting to length
ADH6_melt$gene <- "ADH6"
ADH_traits_df <- rbind(ADH_traits_df, ADH6_melt)
#length(unique(ADH_traits_df$value)) # 107 unique traits for ADH genes
#writeLines(unique(ADH_traits_df$value), con = "results/LD/unique_ADH_LDtraits.txt", sep = "\n")
#write.table(ADH_traits_df, file = "results/LD/ADH_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
# 17 - 3 = 14 ALDH genes
ALDH_traits_df <- data.frame()
ALDH16A1_melt <- melt(ALDH16A1_df_gw, id="Query", measure.vars = "GWAS_Trait")
ALDH16A1_melt <- dcast(ALDH16A1_melt, value ~ variable)
## Aggregation function missing: defaulting to length
ALDH16A1_melt$gene <- "ALDH16A1"
ALDH_traits_df <- rbind(ALDH_traits_df, ALDH16A1_melt)
ALDH18A1_melt <- melt(ALDH18A1_df_gw, id="Query", measure.vars = "GWAS_Trait")
ALDH18A1_melt <- dcast(ALDH18A1_melt, value ~ variable)
## Aggregation function missing: defaulting to length
ALDH18A1_melt$gene <- "ALDH18A1"
ALDH_traits_df <- rbind(ALDH_traits_df, ALDH18A1_melt)
ALDH1A1_melt <- melt(ALDH1A1_df_gw, id="Query", measure.vars = "GWAS_Trait")
ALDH1A1_melt <- dcast(ALDH1A1_melt, value ~ variable)
## Aggregation function missing: defaulting to length
ALDH1A1_melt$gene <- "ALDH1A1"
ALDH_traits_df <- rbind(ALDH_traits_df, ALDH1A1_melt)
ALDH1B1_melt <- melt(ALDH1B1_df_gw, id="Query", measure.vars = "GWAS_Trait")
ALDH1B1_melt <- dcast(ALDH1B1_melt, value ~ variable)
ALDH1B1_melt$gene <- "ALDH1B1"
ALDH_traits_df <- rbind(ALDH_traits_df, ALDH1B1_melt)
ALDH1L1_melt <- melt(ALDH1L1_df_gw, id="Query", measure.vars = "GWAS_Trait")
ALDH1L1_melt <- dcast(ALDH1L1_melt, value ~ variable)
## Aggregation function missing: defaulting to length
ALDH1L1_melt$gene <- "ALDH1L1"
ALDH_traits_df <- rbind(ALDH_traits_df, ALDH1L1_melt)
ALDH1L2_melt <- melt(ALDH1L2_df_gw, id="Query", measure.vars = "GWAS_Trait")
ALDH1L2_melt <- dcast(ALDH1L2_melt, value ~ variable)
## Aggregation function missing: defaulting to length
ALDH1L2_melt$gene <- "ALDH1L2"
ALDH_traits_df <- rbind(ALDH_traits_df, ALDH1L2_melt)
ALDH2_melt <- melt(ALDH2_df_gw, id="Query", measure.vars = "GWAS_Trait")
ALDH2_melt <- dcast(ALDH2_melt, value ~ variable)
## Aggregation function missing: defaulting to length
ALDH2_melt$gene <- "ALDH2"
ALDH_traits_df <- rbind(ALDH_traits_df, ALDH2_melt)
ALDH3A1_melt <- melt(ALDH3A1_df_gw, id="Query", measure.vars = "GWAS_Trait")
ALDH3A1_melt <- dcast(ALDH3A1_melt, value ~ variable)
## Aggregation function missing: defaulting to length
ALDH3A1_melt$gene <- "ALDH3A1"
ALDH_traits_df <- rbind(ALDH_traits_df, ALDH3A1_melt)
ALDH3A2_melt <- melt(ALDH3A2_df_gw, id="Query", measure.vars = "GWAS_Trait")
ALDH3A2_melt <- dcast(ALDH3A2_melt, value ~ variable)
## Aggregation function missing: defaulting to length
ALDH3A2_melt$gene <- "ALDH3A2"
ALDH_traits_df <- rbind(ALDH_traits_df, ALDH3A2_melt)
ALDH3B1_melt <- melt(ALDH3B1_df_gw, id="Query", measure.vars = "GWAS_Trait")
ALDH3B1_melt <- dcast(ALDH3B1_melt, value ~ variable)
## Aggregation function missing: defaulting to length
ALDH3B1_melt$gene <- "ALDH3B1"
ALDH_traits_df <- rbind(ALDH_traits_df, ALDH3B1_melt)
ALDH3B2_melt <- melt(ALDH3B2_df_gw, id="Query", measure.vars = "GWAS_Trait")
ALDH3B2_melt <- dcast(ALDH3B2_melt, value ~ variable)
## Aggregation function missing: defaulting to length
ALDH3B2_melt$gene <- "ALDH3B2"
ALDH_traits_df <- rbind(ALDH_traits_df, ALDH3B2_melt)
ALDH5A1_melt <- melt(ALDH5A1_df_gw, id="Query", measure.vars = "GWAS_Trait")
ALDH5A1_melt <- dcast(ALDH5A1_melt, value ~ variable)
## Aggregation function missing: defaulting to length
ALDH5A1_melt$gene <- "ALDH5A1"
ALDH_traits_df <- rbind(ALDH_traits_df, ALDH5A1_melt)
ALDH6A1_melt <- melt(ALDH6A1_df_gw, id="Query", measure.vars = "GWAS_Trait")
ALDH6A1_melt <- dcast(ALDH6A1_melt, value ~ variable)
## Aggregation function missing: defaulting to length
ALDH6A1_melt$gene <- "ALDH6A1"
ALDH_traits_df <- rbind(ALDH_traits_df, ALDH6A1_melt)
ALDH7A1_melt <- melt(ALDH7A1_df_gw, id="Query", measure.vars = "GWAS_Trait")
ALDH7A1_melt <- dcast(ALDH7A1_melt, value ~ variable)
## Aggregation function missing: defaulting to length
ALDH7A1_melt$gene <- "ALDH7A1"
ALDH_traits_df <- rbind(ALDH_traits_df, ALDH7A1_melt)
#length(unique(ALDH_traits_df$value)) # 302 unique traits for ALDH genes
#writeLines(unique(ALDH_traits_df$value), con = "results/LD/unique_ALDH_LDtraits.txt", sep = "\n")
#write.table(ALDH_traits_df, file = "results/LD/ALDH_LDtraits.txt", sep = "\t", col.names = TRUE, row.names=FALSE)
Get the number of overlapping ADH and ALDH eQTLs.
# 6 ADH genes
# overlapps between ADH eQTLs having trait associateions
ADH1A_gw_esnps <- unique(ADH1A_df_gw$Query)
ADH1B_gw_esnps <- unique(ADH1B_df_gw$Query)
ADH1C_gw_esnps <- unique(ADH1C_df_gw$Query)
ADH4_gw_esnps <- unique(ADH4_df_gw$Query)
ADH5_gw_esnps <- unique(ADH5_df_gw$Query)
ADH6_gw_esnps <- unique(ADH6_df_gw$Query)
list_gw_esnps <- list("ADH1A" = ADH1A_gw_esnps, "ADH1B" = ADH1B_gw_esnps, "ADH1C" = ADH1C_gw_esnps,
"ADH4" = ADH4_gw_esnps, "ADH5" = ADH5_gw_esnps, "ADH6" = ADH6_gw_esnps)
# Ordered by degree
#tiff("figures/ADH_gw_esnp_overlaps.tif", units="in", width=12, height=8, res=300)
#jpeg("figures/ADH_gw_esnp_overlaps.jpeg", units="in", width=12, height=8, res=300)
#pdf("figures/ADH_gw_esnp_overlaps.pdf", width = 12, height = 8)
upset(fromList(list_gw_esnps), sets = c("ADH6", "ADH5", "ADH4", "ADH1C", "ADH1B", "ADH1A"),
nintersects = NA, keep.order = TRUE, sets.x.label = "Number of eQTLs",
mainbar.y.label = "Number of overlapping eQTLs", point.size=4.5, text.scale = 2.2,
group.by = "degree", order.by = "degree", main.bar.color = "black",
sets.bar.color = c("#d9d9d9","#bdbdbd", "#969696", "#737373", "#525252", "#252525"),
number.angles = 0)
#dev.off()
# overlapps between ADH eQTLs having no genome-wide significant trait associations
ADH1A_nogw_esnps <- ADH1A_esnps[!(ADH1A_esnps %in% unique(ADH1A_df_gw$Query))]
ADH1B_nogw_esnps <- ADH1B_esnps[!(ADH1B_esnps %in% unique(ADH1B_df_gw$Query))]
ADH1C_nogw_esnps <- ADH1C_esnps[!(ADH1C_esnps %in% unique(ADH1C_df_gw$Query))]
ADH4_nogw_esnps <- ADH4_esnps[!(ADH4_esnps %in% unique(ADH4_df_gw$Query))]
ADH5_nogw_esnps <- ADH5_esnps[!(ADH5_esnps %in% unique(ADH5_df_gw$Query))]
ADH6_nogw_esnps <- ADH6_esnps[!(ADH1B_esnps %in% unique(ADH6_df_gw$Query))]
list_nogw_esnps <- list("ADH1A" = ADH1A_nogw_esnps, "ADH1B" = ADH1B_nogw_esnps,
"ADH1C" = ADH1C_nogw_esnps, "ADH4" = ADH4_nogw_esnps,
"ADH5" = ADH5_nogw_esnps, "ADH6" = ADH6_nogw_esnps)
# Ordered by degree
#tiff("figures/ADH_nogw_esnp_overlaps.tif", units="in", width=12, height=8, res=300)
#jpeg("figures/ADH_nogw_esnp_overlaps.jpeg", units="in", width=12, height=8, res=300)
#pdf("figures/ADH_nogw_esnp_overlaps.pdf", width = 13, height = 8)
upset(fromList(list_nogw_esnps), sets = c("ADH6", "ADH5", "ADH4", "ADH1C", "ADH1B", "ADH1A"),
nintersects = NA, keep.order = TRUE, sets.x.label = "Number of eQTLs",
mainbar.y.label = "Number of overlapping eQTLs", point.size=4.5, text.scale = 2.2,
group.by = "degree", order.by = "degree", main.bar.color = "black",
sets.bar.color = c("#d9d9d9","#bdbdbd", "#969696", "#737373", "#525252", "#252525"),
number.angles = 0)
#dev.off()
# 14 ALDH genes
# overlapps between ALDH eQTLs having trait associateions
ALDH16A1_gw_esnps <- unique(ALDH16A1_df_gw$Query)
ALDH18A1_gw_esnps <- unique(ALDH18A1_df_gw$Query)
ALDH1A1_gw_esnps <- unique(ALDH1A1_df_gw$Query)
ALDH1B1_gw_esnps <- unique(ALDH1B1_df_gw$Query)
ALDH1L1_gw_esnps <- unique(ALDH1L1_df_gw$Query)
ALDH1L2_gw_esnps <- unique(ALDH1L2_df_gw$Query)
ALDH2_gw_esnps <- unique(ALDH2_df_gw$Query)
ALDH3A1_gw_esnps <- unique(ALDH3A1_df_gw$Query)
ALDH3A2_gw_esnps <- unique(ALDH3A2_df_gw$Query)
ALDH3B1_gw_esnps <- unique(ALDH3B1_df_gw$Query)
ALDH3B2_gw_esnps <- unique(ALDH3B2_df_gw$Query)
ALDH5A1_gw_esnps <- unique(ALDH5A1_df_gw$Query)
ALDH6A1_gw_esnps <- unique(ALDH6A1_df_gw$Query)
ALDH7A1_gw_esnps <- unique(ALDH7A1_df_gw$Query)
list_gw_esnps <- list("ALDH16A1" = ALDH16A1_gw_esnps, "ALDH18A1" = ALDH18A1_gw_esnps,
"ALDH1A1" = ALDH1A1_gw_esnps, "ALDH1B1" = ALDH1B1_gw_esnps,
"ALDH1L1" = ALDH1L1_gw_esnps, "ALDH1L2" = ALDH1L2_gw_esnps,
"ALDH2" = ALDH2_gw_esnps, "ALDH3A1" = ALDH3A1_gw_esnps,
"ALDH3A2" = ALDH3A2_gw_esnps, "ALDH3B1" = ALDH3B1_gw_esnps,
"ALDH3B2" = ALDH3B2_gw_esnps, "ALDH5A1" = ALDH5A1_gw_esnps,
"ALDH6A1" = ALDH6A1_gw_esnps, "ALDH7A1" = ALDH7A1_gw_esnps)
# Ordered by degree
#tiff("figures/ALDH_gw_esnp_overlaps.tif", units="in", width=14, height=11, res=300)
#jpeg("figures/ALDH_gw_esnp_overlaps.jpeg", units="in", width=14, height=11, res=300)
#pdf("figures/ALDH_gw_esnp_overlaps.pdf", width = 14, height = 11)
upset(fromList(list_gw_esnps), sets = c("ALDH7A1", "ALDH6A1", "ALDH5A1", "ALDH3B2", "ALDH3B1",
"ALDH3A2", "ALDH3A1", "ALDH2", "ALDH1L2", "ALDH1L1",
"ALDH1B1", "ALDH1A1", "ALDH18A1", "ALDH16A1"),
nintersects = NA, keep.order = TRUE, sets.x.label = "Number of eQTLs",
mainbar.y.label = "Number of overlapping eQTLs", point.size=4.5, text.scale = 2.2,
group.by = "degree", order.by = "degree", main.bar.color = "black",
sets.bar.color = c("#F5F5F5","#E8E8E8", "#DCDCDC", "#D3D3D3", "#C8C8C8", "#BEBEBE",
"#B0B0B0", "#A8A8A8", "#989898", "#888888", "#787878", "696969",
"#505050", "#303030"),
number.angles = 0)
#dev.off()
# overlapps between ALDH eQTLs having no genome-wide significant trait associations
ALDH16A1_nogw_esnps <- ALDH16A1_esnps[!(ALDH16A1_esnps %in% unique(ALDH16A1_df_gw$Query))]
ALDH18A1_nogw_esnps <- ALDH18A1_esnps[!(ALDH18A1_esnps %in% unique(ALDH18A1_df_gw$Query))]
ALDH1A1_nogw_esnps <- ALDH1A1_esnps[!(ALDH1A1_esnps %in% unique(ALDH1A1_df_gw$Query))]
ALDH1B1_nogw_esnps <- ALDH1B1_esnps[!(ALDH1B1_esnps %in% unique(ALDH1B1_df_gw$Query))]
ALDH1L1_nogw_esnps <- ALDH1L1_esnps[!(ALDH1L1_esnps %in% unique(ALDH1L1_df_gw$Query))]
ALDH1L2_nogw_esnps <- ALDH1L2_esnps[!(ALDH1L2_esnps %in% unique(ALDH1L2_df_gw$Query))]
ALDH2_nogw_esnps <- ALDH2_esnps[!(ALDH2_esnps %in% unique(ALDH2_df_gw$Query))]
ALDH3A1_nogw_esnps <- ALDH3A1_esnps[!(ALDH3A1_esnps %in% unique(ALDH3A1_df_gw$Query))]
ALDH3A2_nogw_esnps <- ALDH3A2_esnps[!(ALDH3A2_esnps %in% unique(ALDH3A2_df_gw$Query))]
ALDH3B1_nogw_esnps <- ALDH3B1_esnps[!(ALDH3B1_esnps %in% unique(ALDH3B1_df_gw$Query))]
ALDH3B2_nogw_esnps <- ALDH3B2_esnps[!(ALDH3B2_esnps %in% unique(ALDH3B2_df_gw$Query))]
ALDH5A1_nogw_esnps <- ALDH5A1_esnps[!(ALDH5A1_esnps %in% unique(ALDH5A1_df_gw$Query))]
ALDH6A1_nogw_esnps <- ALDH6A1_esnps[!(ALDH6A1_esnps %in% unique(ALDH6A1_df_gw$Query))]
ALDH7A1_nogw_esnps <- ALDH7A1_esnps[!(ALDH7A1_esnps %in% unique(ALDH7A1_df_gw$Query))]
list_nogw_esnps <- list("ALDH16A1" = ALDH16A1_nogw_esnps, "ALDH18A1" = ALDH18A1_nogw_esnps,
"ALDH1A1" = ALDH1A1_nogw_esnps, "ALDH1B1" = ALDH1B1_nogw_esnps,
"ALDH1L1" = ALDH1L1_nogw_esnps, "ALDH1L2" = ALDH1L2_nogw_esnps,
"ALDH2" = ALDH2_nogw_esnps, "ALDH3A1" = ALDH3A1_nogw_esnps,
"ALDH3A2" = ALDH3A2_nogw_esnps, "ALDH3B1" = ALDH3B1_nogw_esnps,
"ALDH3B2" = ALDH3B2_nogw_esnps, "ALDH5A1" = ALDH5A1_nogw_esnps,
"ALDH6A1" = ALDH6A1_nogw_esnps, "ALDH7A1" = ALDH7A1_nogw_esnps)
# Ordered by degree
#tiff("figures/ALDH_nogw_esnp_overlaps.tif", units="in", width=14, height=11, res=300)
#jpeg("figures/ALDH_nogw_esnp_overlaps.jpeg", units="in", width=14, height=11, res=300)
#pdf("figures/ALDH_nogw_esnp_overlaps.pdf", width = 14, height = 11)
upset(fromList(list_nogw_esnps), sets = c("ALDH7A1", "ALDH6A1", "ALDH5A1", "ALDH3B2", "ALDH3B1",
"ALDH3A2", "ALDH3A1", "ALDH2", "ALDH1L2", "ALDH1L1",
"ALDH1B1", "ALDH1A1", "ALDH18A1", "ALDH16A1"),
nintersects = NA, keep.order = TRUE, sets.x.label = "Number of eQTLs",
mainbar.y.label = "Number of overlapping eQTLs", point.size=4.5, text.scale = 2.2,
group.by = "degree", order.by = "degree", main.bar.color = "black",
sets.bar.color = c("#F5F5F5","#E8E8E8", "#DCDCDC", "#D3D3D3", "#C8C8C8", "#BEBEBE",
"#B0B0B0", "#A8A8A8", "#989898", "#888888", "#787878", "696969",
"#505050", "#303030"),
number.angles = 0)
#dev.off()
Plot ADH eQTLs associations with alcohol dependence across different tissues.
# Alcohol dependence: ADH1A, ADH1C, ADH4, ADH5
ADH1A_AD <- ADH1A_df_gw[ADH1A_df_gw$GWAS_Trait=="Alcohol dependence", ]
ADH1A_AD_esnps <- unique(ADH1A_AD$Query) # 27
ADH1C_AD <- ADH1C_df_gw[ADH1C_df_gw$GWAS_Trait=="Alcohol dependence", ]
ADH1C_AD_esnps <- unique(ADH1C_AD$Query) # 112
ADH4_AD <- ADH4_df_gw[ADH4_df_gw$GWAS_Trait=="Alcohol dependence", ]
ADH4_AD_esnps <- unique(ADH4_AD$Query) # 102
ADH5_AD <- ADH5_df_gw[ADH5_df_gw$GWAS_Trait=="Alcohol dependence", ]
ADH5_AD_esnps <- unique(ADH5_AD$Query) # 6
ADH1A_AD.df <- ADH1A[ADH1A$snp %in% ADH1A_AD_esnps, ]; length(unique(ADH1A_AD.df$tissue)) # 23
## [1] 23
ADH1C_AD.df <- ADH1C[ADH1C$snp %in% ADH1C_AD_esnps, ]; length(unique(ADH1C_AD.df$tissue)) # 27
## [1] 27
ADH4_AD.df <- ADH4[ADH4$snp %in% ADH4_AD_esnps, ]; length(unique(ADH4_AD.df$tissue)) # 14
## [1] 14
ADH5_AD.df <- ADH5[ADH5$snp %in% ADH5_AD_esnps, ]; length(unique(ADH5_AD.df$tissue)) # 1
## [1] 1
# create a dataframe and populate it with up- and downregulation values
ADH_AD.df <- data.frame(ADH1A = up_down(ADH1A_AD.df), ADH1B = NA, ADH1C = up_down(ADH1C_AD.df),
ADH4 = up_down(ADH4_AD.df), ADH5 = up_down(ADH5_AD.df), ADH6 = NA,
row.names = rownames(t(adh_aldh_mat)))
ADH_AD_mat <- as.matrix(ADH_AD.df)
colors = structure(c("#ED1C24", "#1C75BC", "black"), names = c("up", "down", "both"))
#pdf("figures/adh_ad_gene_regulation_heatmap.pdf", width = 10, height = 16)
ad_gr <- Heatmap(ADH_AD_mat, name = "regulation",
col = colors, na_col = "white",
cluster_rows = FALSE, cluster_columns = FALSE, row_names_side = "left",
column_names_side = "top", column_names_gp = gpar(fontface = "italic"),
column_order = c("ADH1A", "ADH1B", "ADH1C", "ADH4", "ADH5", "ADH6"),
width = unit(5, "cm"), height = unit(22, "cm"),
border = TRUE, rect_gp = gpar(col = "grey"),
heatmap_legend_param = list(legend_direction = "horizontal",
legend_width = unit(5, "cm")))
draw(ad_gr, heatmap_legend_side = "bottom")
#dev.off()
Plot ALDH eQTLs associations with psychiatric disorders and cognition across different tissues.
# ALDH2 - anxiety/tension, bipolar disorder, depression, schizophrenia, cognition
# Anxiety/tension
ALDH2_anx <- ALDH2_df_gw[ALDH2_df_gw$GWAS_Trait=="Anxiety/tension (special factor of neuroticism)", ]
ALDH2_anx_esnps <- unique(ALDH2_anx$Query) # 6
ALDH2_anx.df <- ALDH2[ALDH2$snp %in% ALDH2_anx_esnps, ]; length(unique(ALDH2_anx.df$tissue)) # 1
## [1] 1
ALDH_anx.df <- data.frame(ALDH2 = up_down(ALDH2_anx.df), row.names = rownames(t(adh_aldh_mat)))
ALDH_anx_mat <- as.matrix(ALDH_anx.df)
#pdf("figures/aldh_anx_gene_regulation_heatmap.pdf", width = 10, height = 16)
anx_gr <- Heatmap(ALDH_anx_mat, name = "regulation",
col = colors, na_col = "white",
cluster_rows = FALSE, cluster_columns = FALSE, row_names_side = "left",
column_names_side = "top", column_names_gp = gpar(fontface = "italic"),
column_order = c("ALDH2"), #show_row_names = FALSE,
width = unit(1, "cm"), height = unit(22, "cm"),
border = TRUE, rect_gp = gpar(col = "grey"),
heatmap_legend_param = list(legend_direction = "horizontal",
legend_width = unit(5, "cm")))
draw(anx_gr, heatmap_legend_side = "bottom")
#dev.off()
# Bipolar disorder
ALDH2_bd <- ALDH2_df_gw[ALDH2_df_gw$GWAS_Trait=="Bipolar disorder (MTAG)", ]
ALDH2_bd_esnps <- unique(ALDH2_bd$Query) # 23
ALDH2_bd.df <- ALDH2[ALDH2$snp %in% ALDH2_bd_esnps, ]; length(unique(ALDH2_bd.df$tissue)) # 1
## [1] 1
ALDH_bd.df <- data.frame(ALDH2 = up_down(ALDH2_bd.df), row.names = rownames(t(adh_aldh_mat)))
ALDH_bd_mat <- as.matrix(ALDH_bd.df)
#pdf("figures/aldh_bd_gene_regulation_heatmap.pdf", width = 10, height = 16)
bd_gr <- Heatmap(ALDH_bd_mat, name = "regulation",
col = colors, na_col = "white",
cluster_rows = FALSE, cluster_columns = FALSE, row_names_side = "left",
column_names_side = "top", column_names_gp = gpar(fontface = "italic"),
column_order = c("ALDH2"), #show_row_names = FALSE,
width = unit(1, "cm"), height = unit(22, "cm"),
border = TRUE, rect_gp = gpar(col = "grey"),
heatmap_legend_param = list(legend_direction = "horizontal",
legend_width = unit(5, "cm")))
draw(bd_gr, heatmap_legend_side = "bottom")
#dev.off()
# Depression
ALDH2_de <- ALDH2_df_gw[ALDH2_df_gw$GWAS_Trait=="Depression", ]
ALDH2_de_esnps <- unique(ALDH2_de$Query) # 9
ALDH2_de.df <- ALDH2[ALDH2$snp %in% ALDH2_de_esnps, ]; length(unique(ALDH2_de.df$tissue)) # 1
## [1] 1
ALDH_de.df <- data.frame(ALDH2 = up_down(ALDH2_de.df), row.names = rownames(t(adh_aldh_mat)))
ALDH_de_mat <- as.matrix(ALDH_de.df)
#pdf("figures/aldh_de_gene_regulation_heatmap.pdf", width = 10, height = 16)
de_gr <- Heatmap(ALDH_de_mat, name = "regulation",
col = colors, na_col = "white",
cluster_rows = FALSE, cluster_columns = FALSE, row_names_side = "left",
column_names_side = "top", column_names_gp = gpar(fontface = "italic"),
column_order = c("ALDH2"), #show_row_names = FALSE,
width = unit(1, "cm"), height = unit(22, "cm"),
border = TRUE, rect_gp = gpar(col = "grey"),
heatmap_legend_param = list(legend_direction = "horizontal",
legend_width = unit(5, "cm")))
draw(de_gr, heatmap_legend_side = "bottom")
#dev.off()
# Schizophrenia
ALDH2_sc <- ALDH2_df_gw[ALDH2_df_gw$GWAS_Trait=="Schizophrenia", ]
ALDH2_sc_esnps <- unique(ALDH2_sc$Query) # 62
ALDH2_sc.df <- ALDH2[ALDH2$snp %in% ALDH2_sc_esnps, ]; length(unique(ALDH2_sc.df$tissue)) # 3
## [1] 3
ALDH_sc.df <- data.frame(ALDH2 = up_down(ALDH2_sc.df), row.names = rownames(t(adh_aldh_mat)))
ALDH_sc_mat <- as.matrix(ALDH_sc.df)
#pdf("figures/aldh_sc_gene_regulation_heatmap.pdf", width = 10, height = 16)
sc_gr <- Heatmap(ALDH_sc_mat, name = "regulation",
col = colors, na_col = "white",
cluster_rows = FALSE, cluster_columns = FALSE, row_names_side = "left",
column_names_side = "top", column_names_gp = gpar(fontface = "italic"),
column_order = c("ALDH2"), #show_row_names = FALSE,
width = unit(1, "cm"), height = unit(22, "cm"),
border = TRUE, rect_gp = gpar(col = "grey"),
heatmap_legend_param = list(legend_direction = "horizontal",
legend_width = unit(5, "cm")))
draw(sc_gr, heatmap_legend_side = "bottom")
#dev.off()
# Cognition
ALDH2_co <- ALDH2_df_gw[ALDH2_df_gw$GWAS_Trait=="Cognitive ability, years of educational attainment or schizophrenia (pleiotropy)", ]
ALDH2_co_esnps <- unique(ALDH2_co$Query) # 23
ALDH2_co.df <- ALDH2[ALDH2$snp %in% ALDH2_co_esnps, ]; length(unique(ALDH2_co.df$tissue)) # 1
## [1] 1
ALDH_co.df <- data.frame(ALDH2 = up_down(ALDH2_co.df), row.names = rownames(t(adh_aldh_mat)))
ALDH_co_mat <- as.matrix(ALDH_co.df)
#pdf("figures/aldh_co_gene_regulation_heatmap.pdf", width = 10, height = 16)
co_gr <- Heatmap(ALDH_co_mat, name = "regulation",
col = colors, na_col = "white",
cluster_rows = FALSE, cluster_columns = FALSE, row_names_side = "left",
column_names_side = "top", column_names_gp = gpar(fontface = "italic"),
column_order = c("ALDH2"), #show_row_names = FALSE,
width = unit(1, "cm"), height = unit(22, "cm"),
border = TRUE, rect_gp = gpar(col = "grey"),
heatmap_legend_param = list(legend_direction = "horizontal",
legend_width = unit(5, "cm")))
draw(co_gr, heatmap_legend_side = "bottom")
#dev.off()
# ALDH3A2 - ASD
ALDH3A2_asd <- ALDH3A2_df_gw[ALDH3A2_df_gw$GWAS_Trait=="Autism spectrum disorder", ]
ALDH3A2_asd_esnps <- unique(ALDH3A2_asd$Query) # 2
ALDH3A2_asd.df <- ALDH3A2[ALDH3A2$snp %in% ALDH3A2_asd_esnps, ]
length(unique(ALDH3A2_asd.df$tissue)) # 1
## [1] 1
ALDH3A2_asd.df <- data.frame(ALDH3A2 = up_down(ALDH3A2_asd.df), row.names = rownames(t(adh_aldh_mat)))
ALDH3A2_asd_mat <- as.matrix(ALDH3A2_asd.df)
#pdf("figures/aldh_asd_gene_regulation_heatmap.pdf", width = 10, height = 16)
asd_gr <- Heatmap(ALDH3A2_asd_mat, name = "regulation",
col = colors, na_col = "white",
cluster_rows = FALSE, cluster_columns = FALSE, row_names_side = "left",
column_names_side = "top", column_names_gp = gpar(fontface = "italic"),
column_order = c("ALDH3A2"), #show_row_names = FALSE,
width = unit(1, "cm"), height = unit(22, "cm"),
border = TRUE, rect_gp = gpar(col = "grey"),
heatmap_legend_param = list(legend_direction = "horizontal",
legend_width = unit(5, "cm")))
#draw(asd_gr, heatmap_legend_side = "bottom")
dev.off()
## null device
## 1
Plot ALDH regulatory interactions for bipolar disorder, schizophrenia and cognition in brain frontal cortex and for schizophrenia in brain amygdala.
# Bipolar disorder
ALDH2_bd <- ALDH2_df_gw[ALDH2_df_gw$GWAS_Trait=="Bipolar disorder (MTAG)", ]
ALDH2_bd_esnps <- unique(ALDH2_bd$Query) # 23 eQTLs - trans-interchromosomal (chr15)
ALDH2_bd.df <- ALDH2[ALDH2$snp %in% ALDH2_bd_esnps, ]
#ALDH_bd.df <- data.frame(ALDH2 = up_down(ALDH2_bd.df), row.names = rownames(t(adh_aldh_mat)))
#ALDH_bd_mat <- as.matrix(ALDH_bd.df)
# Schizophrenia
ALDH2_sc <- ALDH2_df_gw[ALDH2_df_gw$GWAS_Trait=="Schizophrenia", ]
ALDH2_sc_esnps <- unique(ALDH2_sc$Query) # 62 eQTLs
ALDH2_sc.df <- ALDH2[ALDH2$snp %in% ALDH2_sc_esnps, ]
ALDH2_sc_fc.df <- ALDH2_sc.df[ALDH2_sc.df$tissue == "Brain_Frontal_Cortex_BA9",]
#length(unique(ALDH2_sc_fc.df$snp)) # 23 eQTLs - trans-interchromosomal (chr15)
ALDH2_sc_am.df <- ALDH2_sc.df[ALDH2_sc.df$tissue == "Brain_Amygdala",]
#length(unique(ALDH2_sc_am.df$snp)) # 6 eQTLs - trans-interchromosomal (chr5)
#ALDH_sc.df <- data.frame(ALDH2 = up_down(ALDH2_sc.df), row.names = rownames(t(adh_aldh_mat)))
#ALDH_sc_mat <- as.matrix(ALDH_sc.df)
#writeLines(ALDH2_sc_fc.df$snp, con = "results/ALDH2_eQTLs/ALDH2_eQTLs_FC_BD-SCZ-Cognition.txt", sep = "\n")
#writeLines(ALDH2_sc_am.df$snp, con = "results/ALDH2_eQTLs/ALDH2_eQTLs_Am_SCZ.txt", sep = "\n")
# Cognition
ALDH2_co <- ALDH2_df_gw[ALDH2_df_gw$GWAS_Trait=="Cognitive ability, years of educational attainment or schizophrenia (pleiotropy)", ]
ALDH2_co_esnps <- unique(ALDH2_co$Query) # 23
ALDH2_co.df <- ALDH2[ALDH2$snp %in% ALDH2_co_esnps, ]
#length(unique(ALDH2_co.df$tissue)) # 1
#ALDH_co.df <- data.frame(ALDH2 = up_down(ALDH2_co.df), row.names = rownames(t(adh_aldh_mat)))
#ALDH_co_mat <- as.matrix(ALDH_co.df)
Ploting the chromosome and ADH and ALDH genes. Check out this post –> https://cran.r-project.org/web/packages/RIdeogram/vignettes/RIdeogram.html
data(human_karyotype, package="RIdeogram")
data(gene_density, package="RIdeogram")
genes_ann <- read.table("data/ADH_and_ALDH_genes.txt", sep = "\t", header = T, stringsAsFactors = F)
# in your terminal: ulimit -s; ulimit -s 16384; R --slave -e 'Cstack_info()["size"]'
#ideogram(karyotype = human_karyotype, overlaid = gene_density, label = genes_ann, label_type = "marker", colorset1 = c("white", "#D0D0D0", "black"))
#convertSVG("chromosome.svg", device = "png")
To get rsIDs for ALDH2 eQTLs associated with BD, SCZ and Cognition in frontal cortex and amygdala, we saved the SNPs into the txt files: writeLines(ALDH2_sc_fc.df$snp, con = "results/ALDH2_eQTLs/ALDH2_eQTLs_FC_BD-SCZ-Cognition.txt", sep = "\n") and writeLines(ALDH2_sc_am.df$snp, con = "results/ALDH2_eQTLs/ALDH2_eQTLs_Am_SCZ.txt", sep = "\n")
To get genomic positions for GWAS SNPs according to genome GRCh38p7 build 151, we downloaded dbSNP bed files for each chromosome from dbSNP ftp site, concatenated them and run bash scripts/rsID2Bed.sh results/ALDH2_eQTLs/ALDH2_eQTLs_FC_BD-SCZ-Cognition.txt and bash scripts/rsID2Bed.sh results/ALDH2_eQTLs/ALDH2_eQTLs_Am_SCZ.txt to convert SNP rsIDs to BED files.
The figure on brain-specific regulatory interactions associated with bipolar disorder, schizophrenia and cognition is made using pygenometracks: conda activate pygenometracks
To plot the ALDH2 gene: pyGenomeTracks --tracks aldh2_gtf.ini --region 12:111760000-111830000 --trackLabelFraction 0.2 --width 25 --dpi 300 -o ALDH2.png
To plot ALDH2 eQTLs in frontal cortex: pyGenomeTracks --tracks aldh2_eqtl_fc_gtf.ini --region 15:84394500-84594500 --trackLabelFraction 0.2 --width 25 --dpi 300 -o ALDH2_eQTL_FC.png
To plot ALDH2 eQTLs in amygdala: pyGenomeTracks --tracks aldh2_eqtl_am_gtf.ini --region 5:127700000-128050000 --trackLabelFraction 0.2 --width 25 --dpi 300 -o ALDH2_eQTL_Am.png